Load the necessary libraries
library(rstanarm) #for fitting models in STAN
library(brms) #for fitting models in STAN
library(coda) #for diagnostics
library(bayesplot) #for diagnostics
library(ggmcmc) #for MCMC diagnostics
library(DHARMa) #for residual diagnostics
library(rstan) #for interfacing with STAN
library(emmeans) #for marginal means etc
library(broom) #for tidying outputs
library(tidybayes) #for more tidying outputs
library(ggeffects) #for partial plots
library(tidyverse) #for data wrangling etc
library(broom.mixed)#for summarising models
library(ggeffects) #for partial effects plots
theme_set(theme_classic()) #put the default ggplot theme back
Polis et al. (1998) were intested in modelling the presence/absence of lizards (Uta sp.) against the perimeter to area ratio of 19 islands in the Gulf of California.
Uta lizard
Format of polis.csv data file
| island | ratio | pa |
|---|---|---|
| Bota | 15.41 | 1 |
| Cabeza | 5.63 | 1 |
| Cerraja | 25.92 | 1 |
| Coronadito | 15.17 | 0 |
| .. | .. | .. |
| island | Categorical listing of the name of the 19 islands used - variable not used in analysis. |
| ratio | Ratio of perimeter to area of the island. |
| pa | Presence (1) or absence (0) of Uta lizards on island. |
The aim of the analysis is to investigate the relationship between island parimeter to area ratio and the presence/absence of Uta lizards.
polis <- read_csv('../data/polis.csv', trim_ws=TRUE) %>%
janitor::clean_names()
glimpse(polis)
## Rows: 19
## Columns: 3
## $ island <chr> "Bota", "Cabeza", "Cerraja", "Coronadito", "Flecha", "Gemelose…
## $ ratio <dbl> 15.41, 5.63, 25.92, 15.17, 13.04, 18.85, 30.95, 22.87, 12.01, …
## $ pa <dbl> 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1
Model formula: \[ \begin{align} y_i &\sim{} \mathcal{Bin}(n, p_i)\\ ln\left(\frac{p_i}{1-p_i}\right) &= \beta_0 + \beta_1 x_i\\ \beta_0 &\sim{} \mathcal{N}(0,10)\\ \beta_1 &\sim{} \mathcal{N}(0,1)\\ \end{align} \]
priors <-
prior(normal(0,10), class = "Intercept") +
prior(normal(0,1), class = "b")
polis_brm1 <- brm(bf(pa|trials(1) ~ ratio, family = binomial()),
data = polis,
prior = priors,
sample_prior = "only", # predictive prior distribution
iter = 5000, warmup = 1000, chains = 3, thin = 5, refresh = 0)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
Note that for a binomial, you have to indicate the total number of trials (trials is a specific reserved word in this function!). Here, we just write trials(1) for the Bernoulli trials seen here (either presence or absence, rather than counts)
conditional_effects(polis_brm1) %>% plot(points = TRUE)
This shows us that our priors are not strong at all, in fact, they may actually be too weak!
We will skip a few models ahead of last time to fit the final model we saw last time…
polis_brm3 <- brm(bf(pa|trials(1) ~ ratio, family = binomial()),
data = polis,
prior = priors,
sample_prior = "yes", # predictive prior distribution
iter = 5000, warmup = 1000, chains = 3, thin = 5, refresh = 0)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DBOOST_NO_AUTO_PTR -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp' -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1 -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -fPIC -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
polis_brm3$fit %>% get_variables()
## [1] "b_Intercept" "b_ratio" "prior_Intercept" "prior_b"
## [5] "lp__" "accept_stat__" "stepsize__" "treedepth__"
## [9] "n_leapfrog__" "divergent__" "energy__"
polis_brm3 %>% hypothesis("ratio = 0") %>% plot()
Can see that the prior has not affected the posterior distribution, and thus were uninformative.
mcmc_plot(polis_brm3, type='combo') # good mixing of chains
mcmc_plot(polis_brm3, type='acf_bar') # No autocorrelation
mcmc_plot(polis_brm3, type='rhat_hist') # Rhat less than 1.05
mcmc_plot(polis_brm3, type='neff_hist') # Neff greater than 0.5 or 50%
ggs_crosscorrelation(ggs(polis_brm3$fit)) # no unexpected cross-correlation
ggs_grb(ggs(polis_brm3$fit)) # scale reduction
pp_check(polis_brm3, type = "dens_overlay", nsamples = 100)
pp_check(polis_brm3, x = "ratio", type = "intervals")
DHARMa residuals:
preds <- posterior_predict(polis_brm3, nsamples = 250,
summary = FALSE)
polis_resids <- createDHARMa(
simulatedResponse = t(preds), # simulated predictions/expected values for each observation
observedResponse = polis$pa, # true values
fittedPredictedResponse = apply(preds, 2, median), # get median expected value for all data points
integerResponse = TRUE) # type of distribution
plot(polis_resids)
polis_brm3 %>%
conditional_effects() %>%
plot(points = TRUE)
summary(polis_brm3)
## Family: binomial
## Links: mu = logit
## Formula: pa | trials(1) ~ ratio
## Data: polis (Number of observations: 19)
## Samples: 3 chains, each with iter = 5000; warmup = 1000; thin = 5;
## total post-warmup samples = 2400
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept 4.55 2.00 1.31 9.18 1.00 1986 1935
## ratio -0.28 0.12 -0.56 -0.09 1.00 1989 2253
##
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
(x <-tidyMCMC(polis_brm3$fit, estimate.method = "median",
conf.int = T, conf.method = "HPDinterval",
rhat = F, ess = F) %>% as.data.frame)
75.8094817 times as likely to be present rather than absent 98.6980774 % present vs. absent 98.3086951 % present vs. absent when ratio increases by 1 unit
Bayes R^2:
polis_brm3 %>%
bayes_R2(summary = FALSE) %>%
median_hdci()
What is the LD50?
LD50 = -Intercept/slope
polis_brm3$fit %>% get_variables
## [1] "b_Intercept" "b_ratio" "prior_Intercept" "prior_b"
## [5] "lp__" "accept_stat__" "stepsize__" "treedepth__"
## [9] "n_leapfrog__" "divergent__" "energy__"
LD50s <- polis_brm3$fit %>%
tidy_draws() %>%
mutate(LD50 = -b_Intercept/b_ratio)
LD50_hpd <- LD50s %>% median_hdci(LD50) %>% janitor::clean_names()
LD50s %>%
ggplot() +
geom_histogram(aes(x = LD50)) +
geom_vline(xintercept = c(LD50_hpd$lower,LD50_hpd$upper),
color = "red", linetype = "dashed") +
geom_vline(xintercept = c(LD50_hpd$ld50),
color = "red") +
scale_y_continuous(expand = expansion())
polis_grid <- with(polis, list(ratio = modelr::seq_range(ratio, n=100)))
newdata <- polis_brm3 %>%
emmeans(~ratio, at = polis_grid, type = "response") %>%
as.data.frame() %>%
rename(pa = prob, lwr = lower.HPD, upr = upper.HPD)
newdata %>%
ggplot(aes(x = ratio, y = pa)) +
geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "blue", alpha = 0.3) +
geom_line() +
geom_point(data = polis) +
labs(x = "Island ratio", y = "Presence vs. absence") +
geom_vline(xintercept = c(LD50_hpd$lower,LD50_hpd$upper),
color = "red", linetype = "dashed") +
geom_vline(xintercept = c(LD50_hpd$ld50),
color = "red") +
scale_x_continuous(expand = expansion())